setwd("/Users/Kadeem Gilbert/Desktop/Microbiome project/R_analysis")

### Start by analyzing eukaryotic datasets 

eotu <- read.table("otu_table_Eukaryota_fixed3.txt",sep="\t",head=T,row.names=2) #read in OTU table
eotu.notax <- eotu[,2:ncol(eotu)] # remove taxonomy
summary(rowSums(eotu.notax))
summary(colSums(eotu.notax))
eotu.t <- t(eotu.notax) # transpose rows and columns
eotu.t <- eotu.t[order(row.names(eotu.t)),] #order row names

mdat <- read.table("MicrobiomeMetadata.txt", sep="\t", head=T, row.names=1) #get metadata
mdat <- mdat[order(row.names(mdat)),] # order row names
emdat <- subset(mdat, row.names(mdat) %in% row.names(eotu.t)) # subset, because metadata had more samples
rownames(emdat) == rownames(eotu.t) # sanity check

summary(rowSums(eotu.t))
summary(colSums(eotu.t))
eotu.t1K <- eotu.t[rowSums(eotu.t) > 1000,] # keep only samples with at least 1K seqs
summary(rowSums(eotu.t1K))
summary(colSums(eotu.t1K))
eotu.t1K <- eotu.t1K[,colSums(eotu.t1K) > 0] 


if (!require("vegan")) {install.packages("vegan"); require("vegan")}

eotu.t1K.r <- rrarefy(eotu.t1K,1103) # rarefy to ~1K for all samples

summary(colSums(eotu.t1K.r))
eotu.t1K.r <- eotu.t1K.r[,colSums(eotu.t1K.r) > 0] # get rid of OTUs with 0 observations in rarefied table
summary(rowSums(eotu.t1K.r))

emdat1 <- subset(emdat, row.names(emdat) %in% row.names(eotu.t1K.r)) # Have to trim down the  metadata because we got rid of low-abundance samples


eotu.bc.nmds <- metaMDS(eotu.t1K.r, trymax = 50) # NMDS analysis
eotu.bc.nmds


#### Subset to HortPark
eotu.h <- subset(eotu.t, emdat$Project=="HortPark")
emdat.h <- subset(emdat, row.names(emdat) %in% row.names(eotu.h)) 

emdat.h$Project <- emdat.h$Project[drop=T] # drop any categories that are no longer in the subsetted metadata file
emdat.h$Species <- emdat.h$Species[drop=T]
emdat.h$Canopy <- emdat.h$Canopy[drop=T]
emdat.h$Width <- emdat.h$Width[drop=T]
emdat.h$Length <- emdat.h$Length[drop=T]
emdat.h$Volume <- emdat.h$Volume[drop=T]
emdat.h$pH <- emdat.h$pH[drop=T]
emdat.h$Elevation <- emdat.h$Elevation[drop=T]
emdat.h$Plant <- emdat.h$Plant[drop=T]
emdat.h$Pitcher <- emdat.h$Pitcher[drop=T]
emdat.h$Fluid_Color <- emdat.h$Fluid_Color[drop=T]
emdat.h$Viscosity <- emdat.h$Viscosity[drop=T]
emdat.h$Prey <- emdat.h$Prey[drop=T]

summary(rowSums(eotu.h))
eotu.h <- eotu.h[rowSums(eotu.h) > 1000,] 
summary(colSums(eotu.h))
eotu.h <- eotu.h[,colSums(eotu.h) > 0] # get rid of OTUs with 0 observations in rarefied table

emdat.h <- subset(emdat.h, row.names(emdat.h) %in% row.names(eotu.h)) # Have to trim down the  metadata because we got rid of low-abundance samples

eotu.h.r <- rrarefy(eotu.h,1675) # rarefy to 1K for all samples
summary(colSums(eotu.h.r))
eotu.h.r <- eotu.h.r[,colSums(eotu.h.r) > 0] 


##### Repeat everything with bacteria

botu <- read.table("otu_table_Bacteria_fixed3.txt",sep="\t",head=T,row.names=2) #read in OTU table
botu.notax <- botu[,2:ncol(botu)] # remove taxonomy
summary(rowSums(botu.notax))
summary(colSums(botu.notax))
botu.t <- t(botu.notax) # transpose rows and columns
botu.t <- botu.t[order(row.names(botu.t)),] #order row names

#mdat <- read.table("Metadata_HP_M.txt", sep="\t", head=T, row.names=1) #get metadata
#mdat <- mdat[order(row.names(mdat)),] # order row names
bmdat <- subset(mdat, row.names(mdat) %in% row.names(botu.t)) # subset, because metadata had more samples
rownames(bmdat) == rownames(botu.t) # sanity check

summary(rowSums(botu.t))
summary(colSums(botu.t))
botu.t1K <- botu.t[rowSums(botu.t) > 1000,] # keep only samples with at least 1K seqs
summary(rowSums(botu.t1K))
summary(colSums(botu.t1K))
botu.t1K <- botu.t1K[,colSums(botu.t1K) > 0] 


#if (!require("vegan")) {install.packages("vegan"); require("vegan")}

botu.t1K.r <- rrarefy(botu.t1K,1311) # rarefy to ~1K for all samples

summary(colSums(botu.t1K.r))
botu.t1K.r <- botu.t1K.r[,colSums(botu.t1K.r) > 0] # get rid of OTUs with 0 observations in rarefied table
summary(rowSums(botu.t1K.r))

bmdat1 <- subset(bmdat, row.names(bmdat) %in% row.names(botu.t1K.r)) # Have to trim down the  metadata because we got rid of low-abundance samples


#### Subset to HortPark
botu.h <- subset(botu.t, bmdat$Project=="HortPark")
bmdat.h <- subset(bmdat, row.names(bmdat) %in% row.names(botu.h)) 

bmdat.h$Project <- bmdat.h$Project[drop=T] # drop any categories that are no longer in the subsetted metadata file
bmdat.h$Species <- bmdat.h$Species[drop=T]
bmdat.h$Canopy <- bmdat.h$Canopy[drop=T]
bmdat.h$Width <- bmdat.h$Width[drop=T]
bmdat.h$Length <- bmdat.h$Length[drop=T]
bmdat.h$Volume <- bmdat.h$Volume[drop=T]
bmdat.h$pH <- bmdat.h$pH[drop=T]
bmdat.h$Elevation <- bmdat.h$Elevation[drop=T]
bmdat.h$Plant <- bmdat.h$Plant[drop=T]
bmdat.h$Pitcher <- bmdat.h$Pitcher[drop=T]
bmdat.h$Fluid_color <- bmdat.h$Fluid_color[drop=T]
bmdat.h$Viscosity <- bmdat.h$Viscosity[drop=T]
bmdat.h$Prey <- bmdat.h$Prey[drop=T]

summary(rowSums(botu.h))
botu.h <- botu.h[rowSums(botu.h) > 1000,] 
summary(colSums(botu.h))
botu.h <- botu.h[,colSums(botu.h) > 0] # get rid of OTUs with 0 observations in rarefied table

bmdat.h <- subset(bmdat.h, row.names(bmdat.h) %in% row.names(botu.h)) # Have to trim down the  metadata because we got rid of low-abundance samples

botu.h.r <- rrarefy(botu.h,1328) # rarefy to 1K for all samples
summary(colSums(botu.h.r))
botu.h.r <- botu.h.r[,colSums(botu.h.r) > 0] 


### Phyloseq

## Install phyloseq 
require(phyloseq)
if (!require("picante")) {install.packages("picante"); require("picante")}

eotu.tree <- read.tree("rep_set_18S.tre") ## rename the 18S tree and make sure it is in your current directory, then read it in with picante

row.names(eotu.h.r) == row.names(emdat.h)

## Prune tree and put into format for phyloseq
eotu.h.tree <- prune.sample(eotu.h.r, eotu.tree)
OTU.eotu.h = otu_table(as.matrix(eotu.h.r), taxa_are_rows = FALSE)
SAM.eotu.h = sample_data(emdat.h)
eotu.h.physeq <-merge_phyloseq(phyloseq(OTU.eotu.h),SAM.eotu.h,eotu.h.tree)

## Measure Unifrac distance and run NMDS
eotu.h.uu.dist <- distance(eotu.h.physeq,"uUniFrac")
eotu.h.uu.nmds <- metaMDS(eotu.h.uu.dist, trymax=800)

##calculate betadiversity, assess homogeneity of variance
eotu.beta <- betadisper(eotu.h.uu.dist, emdat.h$Species)
eotu.beta
permutest(eotu.beta)

## Plot NMDS by pH with ordisurf 
emdat.h.pH <- as.factor(emdat.h$pH)
customcol6 <- c("red","orange","yellow","palegreen2","dodgerblue2","purple4")

plot(eotu.h.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes eukaryotic community by pH", 
     col= customcol6[emdat.h.pH],
     pch =19)
ordisurf(eotu.h.uu.nmds,emdat.h$pH, add=T, col="darkgray")

##Plot NMDS by species with ordispider
customcol.e<-c("darkred","orange","yellow","green","cadetblue4","mediumblue","deeppink","indianred","darkgoldenrod","mediumspringgreen","darkgreen","darkmagenta")
plot(eotu.h.uu.nmds$points[,1:2], xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Eukaryotic community by species", 
     col= customcol.e[emdat.h$Species],
     pch=19)
ordispider(eotu.h.uu.nmds, emdat.h$Species, col=customcol.e, label = F)

## Run mantel test and ordisurf on pH and volume
eotu.h.pH <- vegdist(emdat.h$pH,method="euclidean") 
eotu.hpH.man <- mantel(eotu.h.uu.dist,eotu.h.pH, permutations=999)
eotu.hpH.man

##Run CCA on pH and volume
eotu.pH.cca <- cca(eotu.h.r ~ pH, data=emdat.h)
anova(eotu.pH.cca)

##Run adonis on species, viscosity, and fluid color
eotu.h.ad1 <- adonis(eotu.h.uu.dist ~ Species, data=emdat.h)
eotu.h.ad1

#Repeat everything for bacteria
botu.tree <- read.tree("rep_set_16S.tre") ## rename the 16S tree and make sure it is in your current directory, then read it in with picante

row.names(botu.h.r) == row.names(bmdat.h)

## Prune tree and put into format for phyloseq
botu.h.tree <- prune.sample(botu.h.r, botu.tree)
OTU.botu.h = otu_table(as.matrix(botu.h.r), taxa_are_rows = FALSE)
SAM.botu.h = sample_data(bmdat.h)
botu.h.physeq <-merge_phyloseq(phyloseq(OTU.botu.h),SAM.botu.h,botu.h.tree)

## Measure Unifrac distance and run NMDS
botu.h.uu.dist <- distance(botu.h.physeq,"uUniFrac")
botu.h.uu.nmds <- metaMDS(botu.h.uu.dist, trymax=800)

##calculate betadiversity, assess homogeneity of variance
botu.beta <- betadisper(botu.h.uu.dist, bmdat.h$Species)
botu.beta
permutest(botu.beta)

## Plot NMDS by pH with ordisurf 
bmdat.h.pH <- as.factor(bmdat.h$pH)
customcol6 <- c("red","orange","yellow","palegreen2","dodgerblue2","purple4")

plot(botu.h.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes bacterial community by pH", 
     col= customcol6[bmdat.h.pH],
     pch =19)
ordisurf(botu.h.uu.nmds,bmdat.h$pH, add=T, col="darkgray")

##plot NMDS by species with ordispider
customcol.e<-c("darkred","orange","yellow","green","cadetblue4","mediumblue","deeppink","indianred","darkgoldenrod","mediumspringgreen","darkgreen","darkmagenta")
customcol.b<-c("darkred","orange","yellow","green","cadetblue4","red","mediumblue","deeppink","indianred","darkgoldenrod","mediumspringgreen","darkgreen","darkmagenta")
plot(botu.h.uu.nmds$points[,1:2], xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Bacterial community by species", 
     col= customcol.b[bmdat.h$Species],
     pch=19)
ordispider(botu.h.uu.nmds, bmdat.h$Species, col=customcol.b, label = F) 
legend(locator(1), 
       legend=c("BOS","COP","EYM","FUS","INE","JAC","KHA","MAX","RAM","SAN","SIN","TEN","TRU"),#add JAC for bacteria
       col= customcol.b,
       pch=19,
       cex=0.95,
       y.intersp=0.22,
       x.intersp=0.12,
       inset=0,
       bty = "n")

## Run mantel test and ordisurf on pH and volume
botu.h.pH <- vegdist(bmdat.h$pH,method="euclidean") 
botu.hpH.man <- mantel(botu.h.uu.dist,botu.h.pH, permutations=999)
botu.hpH.man

##Run CCA on pH and volume
botu.pH.cca <- cca(botu.h.uu.dist ~ pH, data=bmdat.h)
anova(botu.pH.cca)

##Run adonis on species, viscosity, and fluid color
botu.h.ad1 <- adonis(botu.h.uu.dist ~ Species, data=bmdat.h)
botu.h.ad1

